This is a basic analysis on the time series of google trends for the search term ‘Saturday Night Live’
#Installing and importing all the necessary libraries
pkgs <- c('dplyr', 'tsibble', 'tidyverse', 'plotly','lubridate', 'feasts')
new <- pkgs[!(pkgs %in% installed.packages()[,"Package"])]
if(length(new)) install.packages(new)
library(dplyr)
library(tsibble)
library(tidyverse)
library(plotly)
library(lubridate)
library(feasts)
#Attaching the datasets
data <- read_csv('SNL.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## Week = col_character(),
## `Saturday Night Live: (Worldwide)` = col_double()
## )
colnames(data) = c('week', 'searches')
data <- data_frame(data)
head(data)
## # A tibble: 6 x 2
## week searches
## <chr> <dbl>
## 1 27-11-2016 25
## 2 04-12-2016 41
## 3 11-12-2016 36
## 4 18-12-2016 53
## 5 25-12-2016 23
## 6 01-01-2017 22
This data has 2 columns with time(recorded weekly) and searches corresponding to that week.
#Converting the week from char to date format
data$week <- as.Date(data$week, "%d-%m-%Y")
data$key = 'K1'
df <- as_tsibble(data, index=week, key=key)
#Finding Missing Values
sapply(df, function(x) sum(is.na(x)))
## week searches key
## 0 0 0
This data has no missing values.
date_range = as.Date(c('22-11-2016','16-11-2021'), "%d-%m-%Y")
fig <- plot_ly(df,x=~week, y=~searches, type='scatter', mode='lines+markers')%>%
layout(title = '<b>Searches for *Saturday Night Live*</b>',
xaxis = list(title = 'Weeks', range=date_range), yaxis = list(title = 'Searches'),
annotations = list(text="This data consists weekly records from 27-Nov-2016 to 14-Nov-2021", showarrow=FALSE, x=date_range[2], y=-2, xanchor="right"))
fig
We can see a clear pattern, an elevated number of search during certain parts of the year and relatively lower number of searches on te other part. This can be attributed to the show airing in a specfic perod of time.
monthly_view <- df %>%
group_by_key() %>%
index_by(Year_Month = ~ yearmonth(.)) %>% # monthly aggregates
summarise(
Number_of_Weeks = n(),
Average = mean(searches, na.rm = TRUE),
Total = sum(searches, na.rm = TRUE),
Mininmum = min(searches, na.rm = TRUE),
Maximum = max(searches, na.rm = TRUE)
)
monthly_view[,c(2:7)]
## # A tsibble: 61 x 6 [1M]
## Year_Month Number_of_Weeks Average Total Mininmum Maximum
## <mth> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2016 Nov 1 25 25 25 25
## 2 2016 Dec 4 38.2 153 23 53
## 3 2017 Jan 5 33.6 168 21 62
## 4 2017 Feb 4 66.2 265 28 100
## 5 2017 Mar 4 35 140 25 46
## 6 2017 Apr 5 29.2 146 18 49
## 7 2017 May 4 36 144 20 48
## 8 2017 Jun 4 14.5 58 13 16
## 9 2017 Jul 5 14 70 13 15
## 10 2017 Aug 4 16.5 66 13 19
## # ... with 51 more rows